#loading packages
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.5     ✓ stringr 1.4.0
## ✓ tidyr   1.1.2     ✓ forcats 0.5.0
## ✓ readr   1.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date()        masks base::date()
## x dplyr::filter()          masks stats::filter()
## x lubridate::intersect()   masks base::intersect()
## x dplyr::lag()             masks stats::lag()
## x lubridate::setdiff()     masks base::setdiff()
## x lubridate::union()       masks base::union()
library(ggridges) # for joy plots
library(plotly) 
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(gganimate)     # for adding animation layers to ggplots
library(gifski)        # for creating the gif (don't need to load this library every time,but need it installed)
#loading data
spotify <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-01-21/spotify_songs.csv')
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   track_id = col_character(),
##   track_name = col_character(),
##   track_artist = col_character(),
##   track_album_id = col_character(),
##   track_album_name = col_character(),
##   track_album_release_date = col_character(),
##   playlist_name = col_character(),
##   playlist_id = col_character(),
##   playlist_genre = col_character(),
##   playlist_subgenre = col_character()
## )
## ℹ Use `spec()` for the full column specifications.

Introduction & Background

Why did we do an analysis on spotify? Why is the data significant & why should people care? In troduce the data to audience

prelim_graph <- spotify %>%
  ggplot(aes(y = playlist_genre, x = track_popularity)) +
  labs(title = "Song Popularity by Genre",
       x = "", y = "",
       subtitle = "Song popularity is measured from 0-100, with higher numbers being indiciative of more popularity.\nHighest median popularities belong to pop and latin with an overall median popularity of 40",
       caption = "Alex Ismail, Malek Kaloti, Brian Lee") +
  theme_classic() + # Choosing a theme
  theme(plot.title.position = "plot",
        plot.title = element_text(size = 20, face = "bold"),
        plot.subtitle = element_text(size = 10, face = "italic")) +
  geom_boxplot() +
  geom_vline(aes(xintercept = median(track_popularity, na.rm = TRUE)), color = "blue") 

prelim_graph

feature_names <- names(spotify)[12:23]

density_plot <- spotify %>%
  select(c('playlist_genre', feature_names)) %>%
  pivot_longer(cols = feature_names) %>%
  ggplot(aes(x = value)) +
  geom_density(aes(color = playlist_genre), alpha = 0.5) +
  facet_wrap(~name, ncol = 3, scales = 'free') +
  labs(title = 'Spotify Audio Feature Density - by Genre',
       x = '', y = 'density') +
  theme(axis.text.y = element_blank())
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(feature_names)` instead of `feature_names` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
ggplotly(density_plot)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
spotify %>% 
  filter(track_popularity >= 75) %>%
ggplot(aes(x = track_popularity, y = playlist_genre)) +
  labs(x = "Popularity", y = "Playlist Genre") +
  geom_density_ridges() + 
  theme_ridges()
## Picking joint bandwidth of 1.37

#get rid of axes, add a more descriptive subtitle

Data Collection

Data retrieved from github, (add link).

R&B Music: an Analysis

randb <- spotify %>%
  filter(playlist_genre == "r&b") %>%
  select(-track_id, - track_album_id, -playlist_id) %>%
  filter(track_popularity >= 75)

randb
## # A tibble: 530 x 20
##    track_name track_artist track_popularity track_album_name track_album_rel…
##    <chr>      <chr>                   <dbl> <chr>            <chr>           
##  1 Life Is G… Future                     93 Life Is Good (f… 2020-01-10      
##  2 Ayy Macar… Tyga                       91 Ayy Macarena     2019-11-13      
##  3 HIGHEST I… Travis Scott               89 JACKBOYS         2019-12-27      
##  4 FML        Arizona Zer…               82 Living Facts     2018-06-03      
##  5 OUT WEST … JACKBOYS                   87 JACKBOYS         2019-12-27      
##  6 Out Of Yo… French Mont…               75 MONTANA          2019-12-06      
##  7 GANG GANG  JACKBOYS                   84 JACKBOYS         2019-12-27      
##  8 Suicidal   YNW Melly                  90 Melly vs. Melvin 2019-11-22      
##  9 Immortal   21 Savage                  83 Immortal         2019-10-31      
## 10 Lalala     Y2K                        91 Lalala           2019-06-28      
## # … with 520 more rows, and 15 more variables: playlist_name <chr>,
## #   playlist_genre <chr>, playlist_subgenre <chr>, danceability <dbl>,
## #   energy <dbl>, key <dbl>, loudness <dbl>, mode <dbl>, speechiness <dbl>,
## #   acousticness <dbl>, instrumentalness <dbl>, liveness <dbl>, valence <dbl>,
## #   tempo <dbl>, duration_ms <dbl>
randb %>% 
  ggplot(aes(x = track_popularity, fill = playlist_subgenre, color = playlist_subgenre)) +
  geom_density(alpha = 0.1) +
  labs(title = "ADD TITLE",
       subtitle = "R&B Subgenre: {closest_state}") +
  transition_states(playlist_subgenre, transition_length = 3, state_length = 1)
#get rid of axes, make subtitle descriptive
anim_save("randb_density.gif")
knitr::include_graphics("randb_density.gif")

Why do hip pop and urban contemp have such similar density curves? For this section I want to look at the features of these two genres specifically.

randb %>%
  group_by(playlist_subgenre) %>%
  filter(playlist_subgenre == c("hip pop", "urban contemporary")) %>%
  summarise_at(c("track_popularity", "danceability", "energy", "key", "loudness", "mode", "speechiness", "instrumentalness", "liveness", "valence", "tempo", "duration_ms"), mean, na.rm = TRUE) %>%
  knitr::kable() 
## Warning in playlist_subgenre == c("hip pop", "urban contemporary"): longer
## object length is not a multiple of shorter object length

## Warning in playlist_subgenre == c("hip pop", "urban contemporary"): longer
## object length is not a multiple of shorter object length
playlist_subgenre track_popularity danceability energy key loudness mode speechiness instrumentalness liveness valence tempo duration_ms
hip pop 82.62411 0.6985887 0.6000979 5.014184 -6.380170 0.6808511 0.1304929 0.0120022 0.1580014 0.4780922 116.8704 200865.0
urban contemporary 81.98039 0.6823333 0.5401578 5.696078 -7.651382 0.4803922 0.1340971 0.0135849 0.1504039 0.4606735 121.0225 207035.3
# maybe somehow graph this??